source('D:/OneDrive/Home/Projetos/artigos/revistas_contexto_final/R/data-prep/02.R')
library(purrr)
library(textcat)
library(genderBR)

# Getting language from Abstract and creating a gender variable for the authors (-Meridiano)
data_sem_meridiano %>%
  separate_rows(AU, sep = ";") %>%
  mutate(
    AU = str_trim(AU) %>% str_to_title(),
    LA2 = textcat(AB),
    GND = get_gender(AU)
  ) -> data_sem_meridiano


data_sem_meridiano %>%
  group_by(TI, PDFURL) %>%
  mutate(
    AU = str_c(AU, collapse = ';'),
    GND = str_c(GND, collapse = ';'),
  ) %>%
  distinct() %>%
  ungroup() -> data_sem_meridiano

# Getting language from Abstract and creating a gender variable for the authors (Meridiano)

data_com_meridiano %>%
  separate_rows(AU, sep = ",") %>%
  mutate(
    AU = str_trim(AU) %>% str_to_title(),
    LA2 = textcat(AB),
    GND = get_gender(AU)
  ) -> data_com_meridiano


data_com_meridiano %>%
  group_by(TI, PDFURL) %>%
  mutate(
    AU = str_c(AU, collapse = ';'),
    GND = str_c(GND, collapse = ';'),
  ) %>%
  distinct() %>%
  ungroup() -> data_com_meridiano


# Limpeza (-Meridiano)


data_sem_meridiano %>% 
  mutate(
    LA2 = case_when(
      LA2 == "german" ~ "english",
      LA2 == 'catalan' ~ 'portuguese',
      str_detect(TI, "Diplomacia Y Guerra. Una Mirada De La") ~ "spanish",
      str_detect(TI, "El Buen Vivir Del Ecuador: Crisis Del Desarrollo") ~ "spanish",
      str_detect(TI, "Participación Uruguaya En El Consejo De Seguridad De La") ~ "spanish",
      str_detect(TI, "El Cds Como Plataforma De La Diplomacia De Defensa Ar") ~ "spanish",
      LA2 == "middle_frisian" ~ "portuguese",
      LA2 == "basque" ~ "portuguese",
      LA2 == "finnish" ~ "portuguese",
      LA2 == "romanian" ~ "portuguese",
      LA2 == "russian-iso8859_5" ~ "portuguese",
      LA2 == "latin" ~ "portuguese",
      LA2 == "sanskrit" ~ "portuguese",
      LA2 == "norwegian" ~ "portuguese",
      LA2 == "irish" ~ "portuguese",
      LA2 == "esperanto" ~ "portuguese",
      LA2 == "breton" ~ "portuguese",
      TRUE ~ LA2
    ),
    AB = na_if(AB,"-"),
    AB = str_replace_all(AB,": ","")
  ) %>% 
  filter(
    AU != "Equipe Monções",
    AU != "Mural Internacional",
    AU != "Programa De Pós-Graduação Em Relações Internacionais",
    AU != "Revista Conjuntura Austral",
    AU != "Conjuntura Austral",
    AU != "Conjuntura Austral - Ufrgs",
    AU != "Estudos Internacionais",
    AU != "Nerint - Ufrgs",
    !str_detect(TI,"Edição Completa"),
    !str_detect(AU,"Carta Internacional")
  ) -> data_sem_meridiano






# Limpeza (Meridiano)


data_com_meridiano %>%
  mutate(
    LA2 = case_when(
      LA2 == "german" ~ "portuguese",
      str_detect(TI,"Brief Introduction On The International Impacts Of") ~ "english",
      TI == "Pour L?histoire Des Relations Internationales" ~ "portuguese",
      str_detect(TI, "Resenha De: Desportes, G?n?ral Vincent,") ~ "portuguese",
      LA2 == "breton" ~ "portuguese",
      LA2 == "catalan" ~ "portuguese",
      str_detect(TI, "El Cds Como Plataforma De La Diplomacia De") ~ "spanish",
      str_detect(TI, "Participaci?n Uruguaya En El Consejo De Seguridad De La O") ~ "spanish",
      str_detect(TI, "El Buen Vivir Del Ecuador: Crisis Del Desarrollo Y Cooperaci?,") ~ "spanish",
      str_detect(TI, "Diplomacia Y Guerra. Una Mirada De La,") ~ "spanish",
      LA2 == "middle_frisian" ~ "portuguese",
      LA2 == "basque" ~ "portuguese",
      LA2 == "finnish" ~ "portuguese",
      LA2 == "romanian" ~ "portuguese",
      LA2 == "russian-iso8859_5" ~ "portuguese",
      LA2 == "latin" ~ "portuguese",
      LA2 == "sanskrit" ~ "portuguese",
      LA2 == "norwegian" ~ "portuguese",
      LA2 == "irish" ~ "portuguese",
      LA2 == "esperanto" ~ "portuguese",
      LA2 == "breton" ~ "portuguese",
      TRUE ~ LA2
    ),
    AB = na_if(AB,"-"),
    AB = str_replace_all(AB,": ",""),
  ) -> data_com_meridiano


data <- bind_rows(data_com_meridiano, data_sem_meridiano)
data

save(data,file ="data/data.RData")

# ---- Save as CSV

readr::write_csv(data,"data/data.csv")

# ---- Save as TSV
readr::write_delim(data, "data/data.tsv")



